import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

from sklearn.preprocessing import OneHotEncoder

mpl.rcParams['font.sans-serif']=[u'simHei']
mpl.rcParams['axes.unicode_minus']=False

path='../data/car.data'
data=pd.read_csv(path,header=None)

### 原始数据
data.head(5)

### 字符串转换为序列id（数字）
X=data.apply(lambda x:pd.Categorical(x).codes)
print(X.head(5))

### 进行哑编码操作
enc=OneHotEncoder()
X=enc.fit_transform(X)
print(enc.n_values_)

### 转换后数据
print(pd.DataFrame(X.toarray()).head(5))